/* ///////////////////////////////////////////////////////////////////////// */
/*  This is part of the source of the OMAP 5912 heterogeneous dual-core      */
/*  MPEG-4 SP video decoder published in ACM Transactions on Embedded        */
/*  Computing Systems, Vol. X, Issue Y.                                      */
/* ------------------------------------------------------------------------- */
/*  The source code is released under GPL license.                           */
/*                                                                           */
/*  Copyright, 2011                                                          */
/*  Multimedia Embedded Systems Labs                                         */
/*  Dept. of Computer Science                                                */
/*  National Chiao Tung University                                           */
/*  Hsinchu, Taiwan.                                                         */
/* ///////////////////////////////////////////////////////////////////////// */

/*
 * m4v_dec.c
 *    -- an mpeg-4 video decoder for HW/SW Co-design, CJT 05-01-2004
 *
 * This is an mpeg-4 simple profile video decoder based on the
 * xvid 0.9. The xvid library was redesigned and rewritten quite
 * a bit to fix conformance issues as well as to simplify the code
 * for easy porting for embedded applications.  Since Xvid is
 * coverred by GPL, the modified source code of the library is
 * still in public domain.  You are free to redistribute and use the
 * source code following GPL guideline.
 *
 * This program is designed for the class "Embedded Firmware and
 * Hardware/Software Co-design,"  Dept. of Computer Science and
 * Information Engineering National Chiao Tung University,
 * 1001 Ta-Hsueh Rd. Hsinchu, 30010, Taiwan
 *
 */

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include "m4vdec_api.h"
#include "image.h"
#include "get_time.h"
#include "mem_address.h"
//////////////////////////////////////////////////////
#include "dma_api_DSP.h"
#include "mbx_command.h"
//////////////////////////////////////////////////////

const short coeff[7] =
    { 0x2000, 0x2bdd, 0x2000, -0x08c5, -0x1258, 0x2000, 0x3770 };
extern volatile int flag[6];

#define DISPLAY_ROW_SIZE 40

void install_arm2dsp_isr();
void dsp_announce(uint16 command, uint16 data);
void enable_global_interrupt(); /* defined in dma_api.c        */
void i_cache_open(void);        /* defined in Icache_example.c */
void i_cache_close(void);       /* defined in Icache_example.c */

void
IMG_display(DEC_CTRL *vdec_obj, int num_352)
{
    DECODER *dec = (DECODER *) vdec_obj->handle;
    dma_port src_obj, dst_obj;
    dma_channel channel_obj;

    uint8  *external_y = dec->cur.y;
    uint8  *external_u = dec->cur.u;
    uint8  *external_v = dec->cur.v;
    uint16 *frame_buffer = (uint16 *) (FRAME_BUFFER_address + 239);
    uint16 *rgb_data[2];
    uint16 *internl_u = (uint16 *) (0x200);
    uint16 *internl_v = internl_u + (DISPLAY_ROW_SIZE >> 1) * 160;
    int     i, j;
    rgb_data[0] = internl_v + (DISPLAY_ROW_SIZE >> 1) * 160;
    rgb_data[1] = rgb_data[0] + 320;

    for (i = 0; i < (240 / DISPLAY_ROW_SIZE); i++)  // (240/DISPLAY_ROW_SIZE) = 6
    {
        uint16 *internl_y = (uint16 *) local_cur_y_address;
        uint16 *internl_u = (uint16 *) (0x200);
        uint16 *internl_v = internl_u + (DISPLAY_ROW_SIZE >> 1) * 160;

        src_obj.staddr = (uint32) external_y;
        src_obj.addressing_frmIndex = (352 - 320) * 2 + 1;
        src_obj.addressing_elmIndex = 1;
        dst_obj.staddr = (uint32) internl_y;
        channel_obj.elmCnt = 320;
        channel_obj.frmCnt = DISPLAY_ROW_SIZE;
        while (flag[1] == 0);
        channel_obj.channel_number = 1;
        dma_api_sds_dp(src_obj, dst_obj, channel_obj);
        external_y =
            (uint8 *) ((uint32) external_y +
                       (uint32) ((uint32) num_352 * (uint32) DISPLAY_ROW_SIZE));

        src_obj.staddr = (uint32) external_u;
        src_obj.addressing_frmIndex = (176 - 160) * 2 + 1;
        src_obj.addressing_elmIndex = 1;
        dst_obj.staddr = (uint32) internl_u;
        channel_obj.elmCnt = 160;
        channel_obj.frmCnt = DISPLAY_ROW_SIZE >> 1;
        while (flag[4] == 0);
        channel_obj.channel_number = 4;
        dma_api_sdd_dp(src_obj, dst_obj, channel_obj);
        external_u += 176 * (DISPLAY_ROW_SIZE >> 1);

        src_obj.staddr = (uint32) external_v;
        src_obj.addressing_frmIndex = (176 - 160) * 2 + 1;
        src_obj.addressing_elmIndex = 1;
        dst_obj.staddr = (uint32) internl_v;
        channel_obj.elmCnt = 160;
        channel_obj.frmCnt = DISPLAY_ROW_SIZE >> 1;
        while (flag[1] == 0);
        channel_obj.channel_number = 1;
        dma_api_sdd_dp(src_obj, dst_obj, channel_obj);
        external_v += 176 * (DISPLAY_ROW_SIZE >> 1);
        while (flag[4] == 0);
        while (flag[1] == 0);

        for (j = 0; j < DISPLAY_ROW_SIZE; j++)  //DISPLAY_ROW_SIZE
        {
            IMG_ycbcr422_rgb565(coeff, internl_y, internl_u, internl_v,
                                rgb_data[j % 2], 320);
            internl_y += 320;
            if (j & 1)
            {
                internl_u += 160;
                internl_v += 160;
            }
            src_obj.staddr = (uint32) rgb_data[j % 2];
            dst_obj.staddr = (uint32) frame_buffer;

            dst_obj.addressing_frmIndex = (240-1)*2+1; //(stride-elecnt)*2+1;
                                                //local_cur_column_size*16=96

            dst_obj.addressing_elmIndex = 1;
            channel_obj.elmCnt = 1;
            channel_obj.frmCnt = 320;
            while (flag[5] == 0);
            channel_obj.channel_number = 5;
            dma_api_dsd_pd(src_obj, dst_obj, channel_obj);
            frame_buffer -= 1;
        }
    }
}

int
main(int arc, char *arv[])
{
    extern volatile int decode_over;
    extern volatile int decode_slice_flag;
    extern volatile int display_flag;
    extern DEC_CTRL vdec_obj;
    extern Bitstream bs_slice[];
    extern uint32 rounding;
    extern uint32 quant;
    extern uint32 fcode;
    extern uint32 intra_dc_threshold;

    extern volatile int slice_num;
    extern volatile int is_pslice;

    enable_global_interrupt();
    i_cache_open();
    install_arm2dsp_isr();
    dsp_announce(D2A_READY, MBX_NO_DATA);
    decode_over = 0;
    decode_slice_flag = 0;
    display_flag = 0;

    while (!decode_over)
    {
        while (decode_slice_flag)
        {
            if (is_pslice)
            {
                decoder_pslice(&vdec_obj, &bs_slice[slice_num], rounding, quant,
                               fcode, intra_dc_threshold, slice_num);
            }
            else
            {
                decoder_islice(&vdec_obj, &bs_slice[slice_num], quant,
                               intra_dc_threshold, slice_num);
            }
            decode_slice_flag = 0;
            dsp_announce(D2A_DECODING_SLICE_DONE, slice_num);
        }

        while (display_flag)
        {
            IMG_display(&vdec_obj, 352);
            display_flag = 0;
            dsp_announce(D2A_DISPLAY_DONE, 0);
        }
        
        /* we should do a usleep() here, but BSP do not support this */
    }
    i_cache_close();
    return 0;
}
